Introduction to hiddenmeta

Install and load packages

install.packages("DeclareDesign")
install.packages("igraph")

devtools::install_github("gerasy1987/hiddenmeta", build_vignettes = TRUE)
library(hiddenmeta)
library(DeclareDesign)
library(knitr)

Step 1. Provide study design features

## STUDY 1
study_1 <- 
  list(
    pop = 
      list(
        handler = get_study_population,
        
        # network structure setup
        network_handler = sim_block_network,
        network_handler_args = 
          list(N = 1000, K = 2, prev_K = c(known = .3, hidden = .1), rho_K = 0,
               p_edge_within = list(known = c(0.1, 0.3), hidden = c(0.1, 0.3)),
               p_edge_between = list(known = 0.1, hidden = 0.1),
               directed = FALSE),
        
        # groups
        group_names = c("known", "hidden"),
        
        # probability of visibility (show-up) for each group
        p_visible = list(known = 1, hidden = 1),
        
        # probability of service utilization in hidden population
        # for service multiplier
        add_groups = 
          list(service_use = "rbinom(n(), 1, 0.25)",
         "purrr::map_df(hidden, ~ sapply( `names<-`(rep(0, times = 10), paste0('loc_', 1:10)), function(add) rbinom(length(.x), 1, 0.1 + .x * add)))",
         known_2 = 0.3, known_3 = 0.3)
      ),
    sample = 
      list(
        rds = list(handler = sample_rds,
                   # RDS parameters
                   sampling_variable = "rds",
                   hidden_var = "hidden", # default
                   n_seed = 20,
                   n_coupons = 3,
                   add_seeds = 3,
                   target_type = "sample",
                   target_n_rds = 60),
        tls = list(handler = sample_tls,
                   sampling_variable = "tls",
                   # TLS sampling parameters
                   target_n_clusters = 4,
                   target_n_tls = 180,
                   cluster = paste0("loc_", 1:10)),
        pps = list(handler = sample_pps,
                   sampling_variable = "pps",
                   # prop sampling parameters
                   sampling_frame = NULL,
                   strata = NULL,
                   cluster = NULL,
                   target_n_pps = 200)
      ),
    inquiries = list(handler = get_study_estimands,
                     known_pattern = "^known$", 
                     hidden_var = "hidden"),
    estimators = 
      list(
        rds = 
          list(sspse = list(handler = get_study_est_sspse,
                            prior_mean = 100,
                            mcmc_params = list(interval = 5, burnin = 2000, samplesize = 500),
                            total = 1000,
                            rds_prefix = "rds", 
                            label = "rds_sspse"),
               chords = list(handler = get_study_est_chords, 
                             type = "mle",
                             seed_condition = "rds_from == -999",
                             n_boot = 100,
                             rds_prefix = "rds",
                             label = "rds_chords"),
               multiplier = list(handler = get_study_est_multiplier, 
                                 service_var = "service_use",
                                 seed_condition = "rds_from == -999",
                                 n_boot = 100,
                                 rds_prefix = "rds",
                                 label = "rds_multi")),
        tls =
          list(ht = list(handler = get_study_est_ht,
                         weight_var = "tls_weight",
                         prefix = "tls",
                         label = "tls_ht"),
               nsum = list(handler = get_study_est_nsum,
                           known = c("known", "known_2", "known_3"),
                           hidden = "hidden_visible_out",
                           survey_design = ~ tls_cluster,
                           n_boot = 100,
                           prefix = "tls",
                           label = "tls_nsum"),
               recap = list(handler = get_study_est_recapture,
                            capture_parse = 
                              "strsplit(x = unique(na.omit(tls_locs_sampled)), split = ';')[[1]]",
                            sample_condition = "tls == 1",
                            model = "Mt",
                            hidden_variable = "hidden",
                            label = "tls_recap")),
        pps = 
          list(ht = list(handler = get_study_est_ht,
                         prefix = "pps",
                         label = "pps_ht"),
               nsum = list(handler = get_study_est_nsum,
                           known = c("known", "known_2", "known_3"),
                           hidden = "hidden_visible_out",
                           survey_design = ~ pps_cluster + strata(pps_strata),
                           n_boot = 100,
                           prefix = "pps",
                           label = "pps_nsum")),
        all = 
          list(recap1 = list(handler = get_study_est_recapture,
                             capture_vars = c("rds", "pps"),
                             model = "Mt",
                             hidden_variable = "hidden",
                             label = "rds_pps_recap"),
               recap2 = list(handler = get_study_est_recapture,
                             capture_vars = c("rds"),
                             capture_parse = 
                              "strsplit(x = unique(na.omit(tls_locs_sampled)), split = ';')[[1]]",
                             model = "Mt",
                             hidden_variable = "hidden",
                             label = "rds_tls_recap"))
      )
  )

Step 2. Declare study population

study_population <-
  eval(as.call(c(list(declare_population), study_1$pop)))

set.seed(872312)
example_pop <- study_population()

example_pop %>% 
  dplyr::sample_n(n()) %>% # shuffle data frame
  rmarkdown::paged_table(options = list(rows.print = 15))

Show the network

g <-
  example_pop %$% {
    hiddenmeta:::retrieve_graph(links) %>%
      igraph::set_vertex_attr("name", value = name) %>%
      igraph::set_vertex_attr("type", value = type)
  }

igraph::V(g)$color <-
  plyr::mapvalues(igraph::V(g)$type,
                  from = unique(igraph::V(g)$type),
                  to = grDevices::palette.colors(n = length(unique(igraph::V(g)$type)), 
                                                 palette = "Set 3"))

plot(g,
     # layout = igraph::layout_on_grid(g, dim = 2, width = 100),
     layout = igraph::layout_on_grid(g, dim = 2, width = 150),
     vertex.size = 1.5, vertex.dist = 4, vertex.label = NA, edge.width = .2,
     edge.arrow.size = .2, edge.curved = .2)

legend(x = -1, y = -1.2,
       legend = c("none", "known only", "hidden only", "both"),
       pt.bg = grDevices::palette.colors(n = length(unique(igraph::V(g)$type)), palette = "Set 3"),
       pch = 21, col = "#777777", pt.cex = 1, cex = 1, bty = "o", ncol = 2)

Step 3. Declare all relevant study sampling procedures

The sampling procedures are additive in a sense that each procedure appends several columns relevant to the sampling procedure and particular draw based on population simulation, but does not change the study population data frame (unless you specify drop_nonsampled = TRUE).

study_sample_rds <- 
  eval(as.call(c(list(declare_sampling), study_1$sample$rds)))

set.seed(872312)
draw_data(study_population + study_sample_rds) %>%  
  dplyr::sample_n(n()) %>% # shuffle data frame
  rmarkdown::paged_table(options = list(rows.print = 15))
study_sample_pps <- 
  eval(as.call(c(list(declare_sampling), study_1$sample$pps)))

set.seed(872312)
draw_data(study_population + study_sample_rds + study_sample_pps) %>% 
  dplyr::sample_n(n()) %>% # shuffle data frame
  rmarkdown::paged_table(options = list(rows.print = 15))
study_sample_tls <- 
  eval(as.call(c(list(declare_sampling), study_1$sample$tls)))

set.seed(872312)
draw_data(study_population + 
            study_sample_rds + study_sample_pps + study_sample_tls) %>% 
  dplyr::sample_n(n()) %>% # shuffle data frame
  rmarkdown::paged_table(options = list(rows.print = 15))

Step 4. Declare study level estimands

study_estimands <- 
  eval(as.call(c(list(declare_inquiry), study_1$inquiries)))

set.seed(872312)
draw_estimands(study_population + 
                 # study_sample_rds + study_sample_pps + study_sample_tls + 
                 study_estimands) %>% 
  kable(digits = 2)
inquiry estimand
known_size 318.00
hidden_size 100.00
known_prev 0.32
hidden_prev 0.10
hidden_size_in_known 38.00
hidden_prev_in_known 0.12

Step 5. Declare estimators used in the study

est_sspse <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$rds$sspse)))
est_chords <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$rds$chords)))
est_multi <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$rds$multiplier)))

est_ht_pps <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$pps$ht)))
est_nsum_pps <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$pps$nsum)))

est_ht_tls <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$tls$ht)))
est_nsum_tls <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$tls$nsum)))
est_recap_tls <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$tls$recap)))

est_recap_rds_pps <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$all$recap1)))
est_recap_rds_tls <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$all$recap2)))

set.seed(872312)
draw_estimates(study_population +
                 study_sample_rds + study_sample_pps + study_sample_tls +
                 study_estimands +
                 est_sspse + est_chords + est_multi +
                 est_nsum_pps + est_ht_pps +
                 est_nsum_tls + est_ht_tls + est_recap_tls +
                 est_recap_rds_pps + est_recap_rds_tls) %>%
  kable(digits = 2)
estimator estimate se inquiry
hidden_size_rds_sspse 104.00 26.30 hidden_size
hidden_size_rds_chords 37.00 24.05 hidden_size
hidden_size_rds_multi 104.00 29.33 hidden_size
hidden_size_pps_nsum 101.82 6.32 hidden_size
hidden_size_pps_ht 85.00 19.85 hidden_size
hidden_prev_pps_ht 0.09 0.02 hidden_prev
hidden_size_tls_nsum 106.11 2.37 hidden_size
hidden_size_tls_ht 4.03 0.85 hidden_size
hidden_prev_tls_ht 0.00 0.00 hidden_prev
hidden_size_tls_recap 51.07 21.46 hidden_size
hidden_size_rds_pps_recap 98.82 16.27 hidden_size
hidden_size_rds_tls_recap 98.83 9.77 hidden_size

Step 6. Declare full study

study1 <- 
  study_population +
  study_sample_rds + study_sample_pps + study_sample_tls +
  study_estimands +
  est_sspse + est_chords + est_multi +
  est_nsum_pps + est_ht_pps +
  est_nsum_tls + est_ht_tls + est_recap_tls +
  est_recap_rds_pps + est_recap_rds_tls

# can draw one of the samples
study1$study_sample_rds(study1$study_population())
#> # A tibble: 1,000 × 84
#>     name type  known hidden links      service_use loc_1 loc_2 loc_3 loc_4 loc_5
#>    <int> <chr> <int>  <int> <chr>            <int> <int> <int> <int> <int> <int>
#>  1     1 00        0      0 11;136;41…           0     0     0     0     0     1
#>  2     2 00        0      0 88;181;24…           0     0     0     1     0     0
#>  3     3 00        0      0 41;82;260…           1     0     0     0     0     0
#>  4     4 00        0      0 24;212;61…           0     0     0     0     0     0
#>  5     5 00        0      0 43;69;72;…           0     1     0     0     0     0
#>  6     6 00        0      0 53;561;60…           1     0     0     0     1     0
#>  7     7 00        0      0 101;177;2…           0     0     0     0     0     0
#>  8     8 00        0      0 11;157;33…           0     0     0     0     0     1
#>  9     9 00        0      0 14;311;35…           0     0     0     0     0     0
#> 10    10 00        0      0 30;161;21…           0     0     0     0     0     1
#> # … with 990 more rows, and 73 more variables: loc_6 <int>, loc_7 <int>,
#> #   loc_8 <int>, loc_9 <int>, loc_10 <int>, known_2 <int>, known_3 <int>,
#> #   n_visible_out <dbl>, known_visible_out <dbl>, hidden_visible_out <dbl>,
#> #   type_00_visible_out <dbl>, type_01_visible_out <dbl>,
#> #   type_10_visible_out <dbl>, type_11_visible_out <dbl>,
#> #   service_use_visible_out <dbl>, loc_1_visible_out <dbl>,
#> #   loc_2_visible_out <dbl>, loc_3_visible_out <dbl>, …
# can draw full data
study1_data <- draw_data(study1)

# can draw estimands
study1$inquiry(study1_data)
#>                inquiry     estimand
#> 1           known_size 313.00000000
#> 2          hidden_size  95.00000000
#> 3           known_prev   0.31300000
#> 4          hidden_prev   0.09500000
#> 5 hidden_size_in_known  22.00000000
#> 6 hidden_prev_in_known   0.07028754
# can draw any of the estimators
study1$rds_sspse(study1_data)
#>               estimator estimate       se     inquiry
#> 1 hidden_size_rds_sspse    150.5 92.57726 hidden_size
study1$pps_ht(study1_data)
#>            estimator estimate          se     inquiry
#> 1 hidden_size_pps_ht   110.00 23.66712443 hidden_size
#> 2 hidden_prev_pps_ht     0.11  0.02366712 hidden_prev
study1$rds_pps_recap(study1_data)
#>                   estimator estimate       se     inquiry
#> 1 hidden_size_rds_pps_recap 92.53333 11.97572 hidden_size

Step 7. Diagnose study design

requireNamespace(c("doParallel", "parallel"))
doParallel::registerDoParallel(cores = 7)

set.seed(872312)
study1_simulations <-
  plyr::llply(
    as.list(1:100),
    .fun = function(x) {
      base::suppressWarnings(
        DeclareDesign::simulate_design(study1, sims = 1) %>%
          dplyr::mutate(
            sim_ID = x
          )
      )
    },
    .parallel = TRUE
  ) %>%
  dplyr::bind_rows()

saveRDS(study1_simulations, "vignettes/study1_sim.rds")
study_diagnosands <-
 declare_diagnosands(
    mean_estimand = mean(estimand),
    mean_estimate = mean(estimate),
    sd_estimate = sd(estimate),
    mean_se = mean(se),
    bias = mean(estimate - estimand),
    rmse = sqrt(mean((estimate - estimand) ^ 2))
  )

study1_simulations <- readRDS(here::here("vignettes/study1_sim.rds"))

diagnose_design(simulations_df = study1_simulations, 
                diagnosands = study_diagnosands) %>% 
  reshape_diagnosis %>% select(-'Design') %>%
  kable()
Inquiry Estimator N Sims Mean Estimand Mean Estimate SD Estimate Mean Se Bias RMSE
hidden_prev hidden_prev_pps_ht 100 0.10 0.11 0.03 0.02 0.01 0.03
(0.00) (0.00) (0.00) (0.00) (0.00) (0.00)
hidden_prev hidden_prev_tls_ht 100 0.10 0.00 0.00 0.00 -0.09 0.09
(0.00) (0.00) (0.00) (0.00) (0.00) (0.00)
hidden_prev_in_known NA 100 0.09 NA NA NA NA NA
(0.00) NA NA NA NA NA
hidden_size hidden_size_pps_ht 100 95.17 106.50 28.15 21.77 11.33 27.10
(0.57) (2.75) (2.32) (0.27) (2.45) (2.18)
hidden_size hidden_size_pps_nsum 100 95.17 99.37 12.57 6.71 4.20 9.68
(0.57) (1.14) (1.04) (0.07) (0.76) (0.72)
hidden_size hidden_size_rds_chords 100 95.17 77.37 38.38 35.41 -17.80 43.65
(0.57) (3.95) (2.28) (0.81) (4.10) (1.92)
hidden_size hidden_size_rds_multi 100 95.17 91.42 13.31 23.72 -3.75 13.59
(0.57) (1.39) (1.17) (1.02) (1.39) (0.98)
hidden_size hidden_size_rds_pps_recap 100 95.17 101.07 19.64 15.32 5.90 17.13
(0.57) (1.90) (1.40) (0.60) (1.62) (1.28)
hidden_size hidden_size_rds_sspse 100 95.17 159.71 32.63 92.65 64.54 72.41
(0.57) (3.03) (4.67) (3.82) (3.04) (4.44)
hidden_size hidden_size_rds_tls_recap 100 95.17 98.38 11.34 10.23 3.21 11.04
(0.57) (1.23) (0.78) (0.29) (1.12) (0.83)
hidden_size hidden_size_tls_ht 100 95.17 3.43 1.15 0.90 -91.74 91.91
(0.57) (0.11) (0.07) (0.02) (0.53) (0.54)
hidden_size hidden_size_tls_nsum 100 95.17 98.73 14.26 7.60 3.56 11.03
(0.57) (1.53) (1.10) (0.31) (1.14) (0.86)
hidden_size hidden_size_tls_recap 100 95.17 33.72 15.29 13.19 -61.45 63.34
(0.57) (1.76) (3.00) (1.54) (1.70) (1.25)
hidden_size_in_known NA 100 27.35 NA NA NA NA NA
(0.34) NA NA NA NA NA
known_prev NA 100 0.30 NA NA NA NA NA
(0.00) NA NA NA NA NA
known_size NA 100 301.35 NA NA NA NA NA
(0.97) NA NA NA NA NA

Multi study declaration

Additional study

study_2 <- 
  list(
    pop = 
      list(
        handler = get_study_population,
        network_handler = sim_block_network,
        network_handler_args = 
          list(N = 5000, K = 3, 
               prev_K = c(frame = .5, known = .2, hidden = .1), 
               rho_K = c(.05, .05, .05),
               p_edge_within = list(frame = c(0.05, 0.05), 
                                    known = c(0.1, 0.05), 
                                    hidden = c(0.2, 0.9)),
               p_edge_between = list(frame = 0.05, 
                                     known = 0.1, 
                                     hidden = 0.01),
               directed = FALSE),
        
        group_names = c("frame", "known", "hidden"),
        
        # probability of visibility (show-up) for each group
        p_visible = list(frame = 1, known = 1, hidden = .6),
        
        # probability of service utilization in hidden population
        # for service multiplier
        
        add_groups = 
          list(service_use = "rbinom(n(), 1, 0.25 + hidden * 0.3)",
               "purrr::map_df(hidden, ~ sapply( `names<-`(runif(10, 0.05,.3), paste0('loc_', 1:10)), function(add) rbinom(length(.x), 1, 0.1 + .x * add)))",
               known_2 = 0.1, known_3 = 0.2)
      ),
    sample = 
      list(
        rds = list(handler = sample_rds,
                   # RDS parameters
                   sampling_variable = "rds",
                   hidden_var = "hidden",
                   n_seed = 100,
                   n_coupons = 3,
                   target_type = "waves",
                   target_n_rds = 2),
        tls = list(handler = sample_tls,
                   sampling_variable = "tls",
                   # TLS sampling parameters
                   target_n_clusters = 4,
                   target_n_tls = 300,
                   cluster = paste0("loc_", 1:8)),
        pps = list(handler = sample_pps,
                   sampling_variable = "pps",
                   # prop sampling parameters
                   sampling_frame = "frame",
                   strata = NULL,
                   cluster = NULL,
                   target_n_pps = 800)
      ),
    inquiries = list(handler = get_study_estimands,
                     known_pattern = "^known$|^frame$", 
                     hidden_var = "hidden"),
    estimators = 
      list(
        rds = 
          list(sspse = list(handler = get_study_est_sspse, 
                            label = "rds_sspse",
                            prior_mean = 450,
                            mcmc_params = list(interval = 5, burnin = 2000, samplesize = 500),
                            total = 5000,
                            rds_prefix = "rds"),
               chords = list(handler = get_study_est_chords, 
                             type = "mle",
                             label = "rds_chords",
                             seed_condition = "rds_from == -999",
                             n_boot = 100,
                             rds_prefix = "rds"),
               multiplier = list(handler = get_study_est_multiplier, 
                                 service_var = "service_use",
                                 seed_condition = "rds_from == -999",
                                 n_boot = 100,
                                 rds_prefix = "rds",
                                 label = "rds_multi")),
        tls =
          list(ht = list(handler = get_study_est_ht,
                         weight_var = "tls_weight",
                         prefix = "tls",
                         label = "tls_ht"),
               nsum = list(handler = get_study_est_nsum,
                           known = c("known", "frame"),
                           hidden = "hidden_visible_out",
                           survey_design = ~ tls_cluster,
                           n_boot = 100,
                           prefix = "tls",
                           label = "tls_nsum"),
               recap = list(handler = get_study_est_recapture,
                            capture_vars = paste0("loc_", 1:8),
                            sample_condition = "tls == 1",
                            model = "Mt",
                            hidden_variable = "hidden",
                            label = "tls_recap")),
        pps = 
          list(
            ht = list(handler = get_study_est_ht, 
                      label = "pps_ht"),
            nsum = list(handler = get_study_est_nsum,
                        known = c("frame", "known"),
                        hidden = "hidden_visible_out",
                        survey_design = ~ pps_cluster + strata(pps_strata),
                        n_boot = 100,
                        label = "pps_nsum")),
        all = 
          list(recap1 = list(handler = get_study_est_recapture,
                             capture_vars = c("rds", "pps"),
                             model = "Mt",
                             hidden_variable = "hidden",
                             label = "rds_pps_recap"),
               recap2 = list(handler = get_study_est_recapture,
                             capture_vars = c("rds", paste0("loc_", 1:8)),
                             model = "Mt",
                             hidden_variable = "hidden",
                             label = "rds_tls_recap"))
      )
  )

study_3 <- 
  list(
    pop = 
      list(
        handler = get_study_population,
        
        # network structure setup
        network_handler = sim_block_network,
        network_handler_args = 
          list(N = 2000, K = 2, prev_K = c(known = .3, hidden = .1), rho_K = .3,
               p_edge_within = list(known = c(0.05, 0.1), hidden = c(0.05, 0.7)),
               p_edge_between = list(known = 0.05, hidden = 0.05),
               directed = FALSE),
        
        # groups
        group_names = c("known", "hidden"),
        
        # probability of visibility (show-up) for each group
        p_visible = list(known = 1, hidden = .7),
        
        # probability of service utilization in hidden population
        # for service multiplier
        add_groups = 
          list(service_use = "rbinom(n(), 1, 0.25 + hidden * 0.05)",
         "purrr::map_df(hidden, ~ sapply( `names<-`(c(.2, .1, .3, .2), paste0('loc_', 1:4)), function(add) rbinom(length(.x), 1, 0.1 + .x * add)))",
         known_2 = 0.1, known_3 = 0.2)
      ),
    sample = 
      list(
        rds = list(handler = sample_rds,
                   # RDS parameters
                   sampling_variable = "rds",
                   hidden_var = "hidden", # default
                   n_seed = 20,
                   n_coupons = 3,
                   target_type = "sample",
                   target_n_rds = 100),
        tls = list(handler = sample_tls,
                   sampling_variable = "tls",
                   # TLS sampling parameters
                   target_n_clusters = 3,
                   target_n_tls = 300,
                   cluster = paste0("loc_", 1:4)),
        pps = list(handler = sample_pps,
                   sampling_variable = "pps",
                   # prop sampling parameters
                   sampling_frame = NULL,
                   strata = NULL,
                   cluster = NULL,
                   target_n_pps = 400)
      ),
    inquiries = list(handler = get_study_estimands,
                     known_pattern = "^known$", 
                     hidden_var = "hidden"),
    estimators = 
      list(
        rds = 
          list(sspse = list(handler = get_study_est_sspse,
                            prior_mean = 200,
                            mcmc_params = list(interval = 5, burnin = 2000, samplesize = 500),
                            total = 2000,
                            rds_prefix = "rds", 
                            label = "rds_sspse"),
               chords = list(handler = get_study_est_chords, 
                             type = "mle",
                             seed_condition = "rds_from == -999",
                             n_boot = 100,
                             rds_prefix = "rds",
                             label = "rds_chords"),
               multiplier = list(handler = get_study_est_multiplier, 
                                 service_var = "service_use",
                                 seed_condition = "rds_from == -999",
                                 n_boot = 100,
                                 rds_prefix = "rds",
                                 label = "rds_multi")),
        tls =
          list(ht = list(handler = get_study_est_ht,
                         weight_var = "tls_weight",
                         prefix = "tls",
                         label = "tls_ht"),
               nsum = list(handler = get_study_est_nsum,
                           known = c("known", "known_2", "known_3"),
                           hidden = "hidden_visible_out",
                           survey_design = ~ tls_cluster,
                           n_boot = 100,
                           prefix = "tls",
                           label = "tls_nsum"),
               recap = list(handler = get_study_est_recapture,
                            capture_vars = paste0("loc_", 1:4),
                            sample_condition = "tls == 1",
                            model = "Mt",
                            hidden_variable = "hidden",
                            label = "tls_recap")),
        pps = 
          list(ht = list(handler = get_study_est_ht,
                         prefix = "pps",
                         label = "pps_ht"),
               nsum = list(handler = get_study_est_nsum,
                           known = c("known", "known_2", "known_3"),
                           hidden = "hidden_visible_out",
                           survey_design = ~ pps_cluster + strata(pps_strata),
                           n_boot = 100,
                           prefix = "pps",
                           label = "pps_nsum")),
        all = 
          list(recap1 = list(handler = get_study_est_recapture,
                             capture_vars = c("rds", "pps"),
                             model = "Mt",
                             hidden_variable = "hidden",
                             label = "rds_pps_recap"),
               recap2 = list(handler = get_study_est_recapture,
                             capture_vars = c("rds", paste0("loc_", 1:4)),
                             model = "Mt",
                             hidden_variable = "hidden",
                             label = "rds_tls_recap"))
      )
  )

Declare multiple studies

multi_population <- 
  declare_population(handler = get_multi_populations, 
                     pops_args = list(study_1 = study_1$pop,
                                      study_2 = study_2$pop,
                                      study_3 = study_3$pop))

multi_sampling <- 
  declare_sampling(handler = get_multi_samples, 
                   samples_args = list(study_1 = study_1$sample,
                                       study_2 = study_2$sample,
                                       study_3 = study_3$sample)) 

multi_inquiry <- 
  declare_inquiry(handler = get_multi_estimands, 
                  inquiries_args = list(study_1 = study_1$inquiries,
                                        study_2 = study_2$inquiries,
                                        study_3 = study_3$inquiries)) 

multi_estimators <-
  declare_estimator(handler = get_multi_estimates,
                    estimators_args = list(study_1 = study_1$estimators,
                                           study_2 = study_2$estimators,
                                           study_3 = study_3$estimators))

multi_study <- multi_population + multi_sampling + multi_inquiry + multi_estimators

# set.seed(872312)
# draw_estimands(multi_study) %>% 
#   kable()
# 
# draw_estimates(multi_study) %>% 
#   kable()

Diagnose multiple studies

# multi_simulations <- simulate_design(multi_study, sims = 2)

requireNamespace(c("doParallel", "parallel"))
doParallel::registerDoParallel(cores = 20)

set.seed(872312)
multi_simulations <-
  plyr::llply(
    as.list(1:100),
    .fun = function(x) {
      base::suppressWarnings(
        DeclareDesign::simulate_design(multi_study, sims = 1) %>%
          dplyr::mutate(
            sim_ID = x
          )
      )
    },
    .parallel = TRUE
  ) %>%
  dplyr::bind_rows()

saveRDS(multi_simulations, file = "vignettes/multi_sim.rds")
multi_simulations <- readRDS(here::here("vignettes/multi_sim.rds"))

diagnose_design(simulations_df = multi_simulations, 
                diagnosands = study_diagnosands) %>% 
  reshape_diagnosis(digits = 2) %>%
  dplyr::select(-'Design') %>%
  dplyr::filter(`Mean Estimate` != "NA") %>% 
  DT::datatable(options = list(scrollX = TRUE, pageLength = 15))

Meta-analysis structure

  1. Conduct multi-study design for as many sampling-estimator pairs in each study as possible, then diagnose the multi-study design. Calculate average (across simulations) estimand and bias of sampling-estimator for each of the studies and estimator sampling strategies. These will serve as population we will be drawing population-sampling-estimator triads

  2. Estimands include:

    • Average estimand by inquiry label (within study)
    • Average bias of specific sampling-estimator pair (across studies) compared to truth
    • Average relative bias of sampling-estimator pair (across studies) compared to “gold standard”
    • Ratio of average bias to costs of sampling-estimator pair
  3. As mentioned before sampling will consist of drawing population-sampling-estimator triads from the population presuming that each study uses at least two sampling strategies at a time

  4. Once we draw sample we use Stan model to estimate study-specific estimands and sampling-estimator specific errors (biases)

  5. We have all parts to conduct the full cycle of meta-analyses

meta_population <- 
  declare_population(multi_design = multi_study, n_sim = 3, parallel = FALSE, 
                     handler = get_meta_population)

meta_inquiry <- 
  declare_inquiry(study_estimand = "hidden_size",
                  samp_est_benchmark = "pps_ht", 
                  handler = get_meta_estimands)

meta_sample <- 
  declare_sampling(sampling_variable = "meta",
                   selection_variables = c("sample", "estimator"),
                   samples_per_study = 2, estimator_per_sample = 3, 
                   force = list(sample = "pps", estimator = "ht"),
                   handler = get_meta_sample)

meta_estimator <- 
  declare_estimator(sampling_variable = "meta",
                    which_estimand = "hidden_size",
                    benchmark = list(sample = "pps", estimator = "ht"),
                    parallel = FALSE,
                    stan_handler = get_meta_stan,
                    handler = get_meta_estimates)

meta_study <- meta_population + meta_inquiry + meta_sample + meta_estimator

# set.seed(872312)
# draw_estimands(meta_study) %>% 
#   kable()
# 
# draw_estimates(meta_study) %>% 
#   kable()
requireNamespace(c("doParallel", "parallel"))
doParallel::registerDoParallel(cores = 20)

set.seed(872312)
meta_simulations <-
  plyr::llply(
    as.list(1:100),
    .fun = function(x) {
      base::suppressWarnings(
        DeclareDesign::simulate_design(meta_study, sims = 1) %>%
          dplyr::mutate(
            sim_ID = x
          )
      )
    },
    .parallel = TRUE
  ) %>%
  dplyr::bind_rows()

saveRDS(meta_simulations, file = "vignettes/meta_sim.rds")
meta_simulations <- readRDS(here::here("vignettes/meta_sim.rds"))

diagnose_design(simulations_df = meta_simulations, 
                diagnosands = study_diagnosands) %>% 
  reshape_diagnosis(digits = 2) %>%
  dplyr::select(-'Design') %>%
  dplyr::filter(`Mean Estimate` != "NA") %>% 
  DT::datatable(options = list(scrollX = TRUE, pageLength = 15))